options compress=YES;
libname data ''; 
libname mover ''; 
%let data =  ;

* PREP EIN MOVERS ;

%macro derprep(num) ;
filename inzip ZIP "&data.der_seg&num..zip"
               member = "der_seg&num..sas7bdat";
filename ds "%sysfunc(getoption(work))\der_seg&num..sas7bdat" ;

* Read in the first wave ;
data _null_;
   * reference the member name WITH folder path ;
   infile inzip
    lrecl=256 recfm=F length=length eof=eof unbuf;
   file   ds lrecl=256 recfm=N;
   input;
   put _infile_ $varying256. length;
   return;
 eof:
   stop;
run;

data work.der_seg&num.;
    set work.der_seg&num.
    (
	keep = ssn year ein wage_tips_irs 
    rename=(wage_tips_irs=earn year=char_year)
    );

	year = input(char_year,4.) ;
	drop char_year ;
run ;

* KEEP ONLY HIGHEST EARNINGS FOR SSN-EIN-YEAR ;
proc sort data=work.der_seg&num. out=work.der_seg&num. ;
	by ssn ein year descending earn ;
run;

proc sort data=work.der_seg&num. out=work.der_seg&num. nodupkey ;
	by ssn ein year ;
run;

data work.der_seg&num. ;
	set work.der_seg&num. 
	(keep = ssn ein year) ;

run ;

proc means min max data=work.der_seg&num. ;
		var year ;
	run ;

* Within SSN, delete if EIN is the same as the previous observation (previous year) ;
* Reshape wide ;

data work.der_seg&num. ;
	set work.der_seg&num. ;
	by ssn;

	keep 	ssn 
			ein1978-ein2018
			;	

	retain 	ein1978-ein2018 ;

	array	aein(1978:2018)	$9 ein1978-ein2018 ;

	if first.ssn then
	do;
		do i=1978 to 2018;
			aein(i) = "";
		end;
	end;

	aein(year) = ein ;

	if last.ssn then output;

run;

* Delete observations that repeat previous year ;
data work.der_seg&num. ;
	set work.der_seg&num. ;

		array ein(1978:2018) ein1978-ein2018 ;
		array nein(1978:2018) nein1978-nein2018 ;

		nein(1978) = ein(1978) ;
		do i = 1979 to 2018 ;
			if ein(i) ^= ein(i-1) then nein(i) = ein(i) ;
				else nein(i) = . ;
		end ;

		drop ein: i ;
run ;

* Reshape long ;
data work.der_seg&num. ;
	set work.der_seg&num. ;

	array 	anein(1978:2018) 	nein1978-nein2018 ;
	
	do year = 1978 to 2018;
		nein = anein(year);
		output;
	end;

	drop 	nein1978-nein2018 ;	
run;

data mover.temp_seg&num. ;
	set work.der_seg&num.
		(where=(nein^=.)) ;
run ;
%mend derprep ;

%derprep(02) ;
%derprep(03) ;
%derprep(05) ;
%derprep(06) ;
%derprep(09) ;
%derprep(10) ;
%derprep(15) ;
%derprep(16) ;
%derprep(19) ;
%derprep(20) ;

* Combine all segments ;
data mover.ein ;
	set mover.temp_seg02 
		mover.temp_seg03
		mover.temp_seg05
		mover.temp_seg06
		mover.temp_seg09
		mover.temp_seg10
		mover.temp_seg15
		mover.temp_seg16
		mover.temp_seg19
		mover.temp_seg20
		;
run ;

proc sort data=mover.ein out=mover.ein nodupkey ;
	by ssn year ;
run;

* Merge with Numident ;
filename inzip ZIP "&data.num_per10.zip"
               member = "num_per10.sas7bdat";
filename ds "%sysfunc(getoption(work))\num_per10.sas7bdat" ;

* Read in the first wave ;
data _null_;
   * reference the member name WITH folder path ;
   infile inzip
    lrecl=256 recfm=F length=length eof=eof unbuf;
   file   ds lrecl=256 recfm=N;
   input;
   put _infile_ $varying256. length;
   return;
 eof:
   stop;
run;

data work.num_per10 ;
    set work.num_per10
    (keep = ssn dob);
run ;

proc sort data=work.num_per10 out=work.num_per10 nodupkey ;
	by ssn ;
run;

data mover.ein ;

	merge 	mover.ein (in=from_mover)
			work.num_per10
			;
	by ssn ;
	if from_mover=1 ;
run ;

data mover.ein ;
	set mover.ein
	(rename=(nein=ein));

	byr_char = substr(dob,5,4) ;
	if substr(byr_char,1,1) = "X" then byr_char = "" ;
	if substr(byr_char,2,1) = "X" then byr_char = "" ;
	if substr(byr_char,3,1) = "X" then byr_char = "" ;
	if substr(byr_char,4,1) = "X" then byr_char = "" ;
	byr = input(byr_char,4.) ;
	drop dob byr_char ;

run ;


run; quit;
